/*
HOSPITAL CORPORATIZATION

THIS FILE LOADS HOSPITAL CLAIMS AND KEEPS BENEFICIARIES WHO HAVE ONE YEAR ENROLLMENT HISTORY. GRAB ALL PATIENTS IN ALL HOSPITALS. FLAG IF A NON-DEFERRABLE ADMISSION, IF THROUGH THE ED. DROP PATIENTS <AGE 65 AT ADMISSION. WORKS OFF THE BASE INPATIENT FILE.


FIRST CREATED: SEPT 22, 2022 from v1 of same name;
LAST UPDATED: DEC 24, 2023

DEC 24, 2023;
- PREPARE FOR REPLICATION PACKAGE;

*/

clear all
set more off
*set trace on 
global datapath "<folder containing project data files>"
global claims "<folder containing claims files>"

*Run master program file;
qui do "$codepath/0_prog_master_vfinal.do"

local firstyr 2012
local lastyr 2013
local vars bene_id clm_id admsn_dt dschrgdt provider typesrvc fac_type from_dt thru_dt drg_cd
local bm_vars bene_id buyin* sex race death_dt bene_dob 

********************************************************************************
*Step 1: Preamble - prepare temp utility files;

*Grab AHAID from the deals file;

*setup conversions crosswalk file for use;
use "$datapath/aha_merge6_1", clear

sort ahaid year

ren old_num provider
tostring provider, force replace 
replace provider = "0"+provider if length(provider)==5

keep provider ahaid hrrcode
	
*keep unique providers;
bys provider: keep if _n==1 

tempfile hosplist
save `hosplist', replace 


*Prepare files to merge DRG weights;
import excel using "$datapath/utility/table5FR-2010.xls", firstrow clear
foreach var of varlist _all{
	local new = lower("`var'")
	ren `var' `new'	
}
ren msdrg drg_cd 
destring drg_cd, force replace
ren weights drgwt
drop if drg_cd==.
tempfile drgwt10
save `drgwt10', replace

import excel using "$datapath/utility/table5FR-2015.xlsx", firstrow clear
drop J-N
ren TABLE drg_cd
ren G drgwt
drop in 1/1
keep drg_cd drgwt
destring drg_cd drgwt, force replace
drop if drg_cd==.
tempfile drgwt15
save `drgwt15', replace

********************************************************************************
*Step 2: grab nondef admissions for all hospitals in desired period;

forval yr =`firstyr'(1)`lastyr'{

	di "*******************"
	di "Year = `yr'"
	
	cd "$claims/ip/`yr'/"
		
	*identify cases that originated in the ed;
	use if (inrange(rev_cntr,"0450","0459") | (rev_cntr=="0981")) ///
		using ipr`yr', clear  
		
	gen ed = 1 
	
	di "Keep only one row per claim"
	bys bene_id clm_id: keep if _n==1
	
	keep bene_id clm_id ed 
	
	di "number of ED visits"
	count 
	
	di "save revenue file with ed dummy"
	tempfile ip_ed`yr'
	save `ip_ed`yr'', replace
	
	use `vars' icd_dgns_cd* using "$claims/ip/`yr'/ipc`yr'", clear
	gen prncpal_dgns_cd = icd_dgns_cd1
	
	*limit to first 10 dx codes to be consistent over full period;
	cap drop icd_dgns_cd11 icd_dgns_cd12 icd_dgns_cd13 icd_dgns_cd14 icd_dgns_cd15 icd_dgns_cd16 icd_dgns_cd17 icd_dgns_cd18 icd_dgns_cd19 icd_dgns_cd20 icd_dgns_cd21 icd_dgns_cd22 icd_dgns_cd23 icd_dgns_cd24 icd_dgns_cd25
	
	di "bring in AHAID"
	merge m:1 provider using `hosplist', keepusing(ahaid hrrcode)
	keep if _m==3
	drop _m
		
	di "merge ed indicator"
	merge m:1 bene_id clm_id using `ip_ed`yr'', keepusing(ed)
	drop if _m==2
	drop _m
	
	recode ed (missing=0)
		
	di "Replace missing admission/discharge dates"
	replace dschrgdt= thru_dt if dschrgdt==.
	replace admsn_dt=from_dt if admsn_dt==.
	drop from_dt thru_dt

	di "Drop stays longer than a year"
	keep if (dschrgdt - admsn_dt+1)<=365
	
	di "Keep only facility type hospital"
	keep if inlist(fac_type,"1","8")
	
	di "Keep only type of service: inpatient"
	keep if inlist(typesrvc,"1")
	
	drop fac_type typesrvc
	
	gen icd10ind= dschrgdt>=td(1oct2015)
	
	di "create flag if non-deferrable admission"
	ident_nondef
	
	gen ed_nondef = ed * i_nondef
	
	gen dschrgqtr = qofd(dschrgdt)
	format dschrgqtr %tq
	
	di "number of nondef cases by quarter"
	tab dschrgqtr if i_nondef==1
	
	di "only retain non-deferrable admissions"
	keep if i_nondef==1
	
	gen drg_str = drg_cd
		
	destring drg_cd, force replace

	if inrange(`yr',2008,2012){
		di "Merge DRG weight"	
		merge m:1 drg_cd using `drgwt10', keepusing(drgwt)
		drop if _m==2
		drop _m		
	}
	
	if inrange(`yr',2013,2017){
		di "Merge DRG weight"	
		merge m:1 drg_cd using `drgwt15', keepusing(drgwt)
		drop if _m==2
		drop _m		
	}
	*drg loop ends;
	
	*Create flag for elixhauser comorbidities;
	qui elixhauser icd_dgns_cd*, index(e) smelix 
	drop weightel*
		
	if `yr'==`firstyr'{
		tempfile ip_sample
		save `ip_sample', replace
	}
	if `yr'>`firstyr'{
		tempfile file`yr'
		save `file`yr'', replace
		use `ip_sample', clear
		append using `file`yr''
		save `ip_sample', replace
	}
}
*year loop ends;

sort bene_id admsn_dt dschrgdt 
gen year = year(dschrgdt)

save "$datapath/desc_sample", replace

********************************************************************************
*Step 3: Grab demographics, birth/death, and enrollment info;

*Create beneficiary master file;
local prevyr = `firstyr'-1	
local nextyr = `lastyr'+1	

forval yr=`prevyr'(1)`nextyr'{
	
	di "*******************"
	di "Year = `yr'"
	
	use `bm_vars' using "$claims/bsfbase/`yr'/bsfab`yr'.dta", clear
	merge 1:m bene_id using "$datapath/desc_sample", keepusing(i_nondef)
	
	di "Only keep patients appearing in the inpatient sample"
	keep if _m==3
	drop _merge i_nondef
	
	duplicates drop bene_id, force 
	
	gen year = `yr'
	
	egen buyin = concat(buyin01-buyin12) 
	drop buyin01-buyin12 buyin_mo 
	
	if `yr'==`prevyr'{
		tempfile master
		save `master', replace		
	}
	if `yr'>`prevyr'{
		tempfile master`yr'
		save `master`yr'', replace
		use `master', clear
		append using `master`yr''
		save `master', replace
	}
	
}
*year loop ends;	

sort bene_id year
by bene_id: egen frstyr = min(year)
by bene_id: egen lastyr = max(year)

di "Update death dates to incorporate info from earlier year"
by bene_id: replace death_dt = death_dt[_n-1] if death_dt[_n-1]!=.
by bene_id: gen final_deathdt = death_dt[_N]
format %td final_deathdt bene_dob
by bene_id: gen updated_death = final_deathdt!=death_dt & death_dt!=. & final_deathdt!=.
by bene_id: egen ind_update_dth = max(updated_death)
drop updated_death death_dt

di "Edit sex and race to be constant within a person based on first obs"
by bene_id: replace sex = sex[_n-1] if sex[_n-1]!=""
by bene_id: replace race = race[_n-1] if race[_n-1]!=""
by bene_id: replace bene_dob = bene_dob[_n-1] if bene_dob[_n-1]!=.

reshape wide buyin, i(bene_id sex race bene_dob final_deathdt) j(year)

forval yr=`prevyr'(1)`lastyr'{	
	replace buyin`yr'="XXXXXXXXXXXX" if buyin`yr'==""
}

save "$datapath/desc_bene_master", replace

*------------------------------------------------------------------------------;
*Now check enrollment;
local back_wind_len 12
set more off

use "$datapath/desc_sample", clear
merge m:1 bene_id using "$datapath/desc_bene_master", keepusing(final_deathdt bene_dob buyin* frstyr lastyr sex)

di "keep only those discharges for which we have enrollment info"
keep if _m==3
drop _merge

gen ageatadmsn = int((admsn_dt - bene_dob)/365.25)

di "Drop patients younger than 65 at the time of admission"
drop if ageatadmsn<65

di "1. Set enr=0 if last year of bene master file ends before admission begins"
gen pre_enr=0 if lastyr < year(admsn_dt)
tab pre_enr

egen buyin = concat(buyin*)
drop buyin`prevyr'-buyin`lastyr'

gen frst_mth = mofd(mdy(1,1,frstyr))
gen admsn_mth_pos = mofd(admsn_dt) - frst_mth +1
gen dschrg_mth_pos = mofd(dschrgdt) - frst_mth +1

di "2. set pre_enr=0 if look back period is less than a year"
replace pre_enr=0 if admsn_mth_pos < 12

gen back_window = "invalid" if pre_enr==0
replace back_window = substr(buyin,(admsn_mth_pos-11),`back_wind_len') if pre_enr==.

di "3a. Not enrolled if not in parts A and B for 12 months prior to admission"
replace pre_enr=0 if regexm(back_window,"0") | regexm(back_window,"1") | regexm(back_window,"2") ///
	| regexm(back_window,"A") | regexm(back_window,"B") |  regexm(back_window,"X")

di "4. All remaining benes are enrolled as desired"	
gen enr=0
replace enr=1 if pre_enr==. 

save "$datapath/desc_enr_sample", replace

********************************************************************************
*Step 4: grab history/future inpatient stays and dx details for all relevant patients;

di "grab history for these patients"
bys bene_id: keep if _n==1 
keep bene_id 

tempfile patlist
save `patlist', replace

forval yr=`prevyr'(1)`lastyr'{
	
	di "*******************"
	di "Year = `yr'"
	
	use bene_id clm_id admsn_dt dschrgdt icd_dgns_cd* using "$claims/ip/`yr'/ipc`yr'", clear
	gen prncpal_dgns_cd = icd_dgns_cd1 
		
	di "only retain patients in the non-deferrable sample"	
	merge m:1 bene_id using `patlist'
	keep if _m==3
	drop _m
	
	gen icd10ind = dschrgdt>=td(1oct2015)
	
	*limit to first 10 dx codes to be consistent over full period;
	cap drop icd_dgns_cd11 icd_dgns_cd12 icd_dgns_cd13 icd_dgns_cd14 icd_dgns_cd15 icd_dgns_cd16 icd_dgns_cd17 icd_dgns_cd18 icd_dgns_cd19 icd_dgns_cd20 icd_dgns_cd21 icd_dgns_cd22 icd_dgns_cd23 icd_dgns_cd24 icd_dgns_cd25
	
	di "create elixhauser flags"
	*create flag for elixhauser comorbidities;
	qui elixhauser icd_dgns_cd*, index(e) smelix 
	drop weightel*	
			
	forval i=1/31{
		ren ynel`i' histynel`i'
	}
	
	if `yr'==`prevyr'{
		tempfile history
		save `history', replace
	}
	else{
		tempfile hist`yr'
		save `hist`yr'', replace
		use `history', clear
		append using `hist`yr''
		save `history', replace
	}
}

sort bene_id dschrgdt 
ren dschrgdt histdschrg
ren admsn_dt histadmsn
ren prncpal_dgns_cd histprdx
ren elixsum histelsum
drop icd_dgns_cd* 


save "$datapath/history_desc_sample", replace
*this file is called history but also has future stays which can be used to flag readmissions;

********************************************************************************
*Step 5: create history flags;
use "$datapath/desc_enr_sample", clear

gen low = admsn_dt - 365
gen high = admsn_dt-1 

di "merge history file with main file and grab 365-day inpatient history"

rangejoin histdschrg low high using "$datapath/history_desc_sample", by(bene_id) keepusing(histdschrg histynel* histelsum)

sort bene_id clm_id histdschrg

di "create flag for inpatient use in the past 90 days"
gen i_hist30d = admsn_dt - histdschrg <=30
gen i_hist90d = admsn_dt - histdschrg <=90
gen i_hist1y = admsn_dt - histdschrg <=365
by bene_id clm_id: egen hist30d = max(i_hist30d) 
by bene_id clm_id: egen hist90d = max(i_hist90d) 
by bene_id clm_id: egen hist1y = max(i_hist1y) 
drop i_hist30d i_hist90d i_hist1y

gcollapse (max) histynel* (mean) hist30d hist90d hist1y admsn_dt, by(bene_id clm_id)

*Create sum of historic elixhauser flags;
egen histelsum = rowtotal(histynel1-histynel31)

tempfile histcollapse
save `histcollapse', replace 

********************************************************************************
*Step 6: create readmission flags;
use "$datapath/desc_enr_sample", clear

gen low = dschrgdt + 1 
gen high = dschrgdt + 90

di "merge history file with main file and grab 90-day readmissions"

rangejoin histadmsn low high using "$datapath/history_desc_sample", by(bene_id) keepusing(histadmsn)

sort bene_id clm_id histadmsn

di "create flags for readmission at different durations"
gen diff_readm = histadmsn - dschrgdt
by bene_id clm_id: egen min_diff_readm = min(diff_readm)

di "create flags for readmission at different durations"
local durns 90
foreach durn of local durns{
	gen i_readm`durn'd = histadmsn - admsn_dt <= `durn'
	by bene_id clm_id: egen readm`durn'd = max(i_readm`durn'd) 
}
drop i_readm*

gcollapse (mean) readm*d dschrgdt min_diff_readm, by(bene_id clm_id)

tempfile readm
save `readm', replace 

********************************************************************************
*Step 7: merge history and readmission flags back into main file;

use "$datapath/desc_enr_sample", clear

di "merge history of 90-d inpatient use and 1 yr elixhauser dummies"
merge 1:1 bene_id clm_id admsn_dt using `histcollapse', keepusing(histynel* histelsum hist30d hist90d hist1y)
drop if _m==2
drop _m

di "merge history of 90-d inpatient use and 1 yr elixhauser dummies"
merge 1:1 bene_id clm_id dschrgdt using `readm', keepusing(readm*d min_diff_readm)
drop if _m==2
drop _m

drop icd_dgns_cd2-icd_dgns_cd10 admsn_mth_pos dschrg_mth_pos back_window pre_enr buyin frstyr lastyr

di "save final file with death date, admsn_dt, historical use, elixhauser dummies based on historical use, and readmission flags"

save "$datapath/desc_enr_hist_readm_sample", replace

*END CODE;
